## "replication_main.R" from replication package for "Registering Returning Citizens to Vote"
## This script reproduces the main analyses (Table 1 and Table 2) from the paper and SI Tables A1 and A10

rm(list = ls())
library(stargazer)
## starpolishr needs to be installed with:
# devtools::install_github("ChandlerLutz/starpolishr")
library(starpolishr)
library(magrittr)
library(readr)
library(xtable)
library(tidyverse)

###########################################################################
# Prepare data
# Note that for the main analysis, we only need data from NC
###########################################################################
main <- read_csv("main_paper_2025.csv")

# extract north carolina data
main <- main[main$list == "NCmainlist", ]; dim(main) 

# set up indicator for any treatment (ie not control), rather than separate arms
main$anytreat <- 1; 
main[main$Treatment == "T1", "anytreat"] <- 0; table(main$anytreat)

###########################################################################
# Table 1 of main paper
###########################################################################

######################################
# analysis
######################################
NCarms1 <- lm(reg2020 ~ as.factor(Treatment), data = main[main$list == "NCmainlist",]); summary(NCarms1)
NCarms2 <- lm(reg2020 ~ as.factor(Treatment) + ageyears + male + wrublack + pastincarc, data = main[main$list == "NCmainlist",]); summary(NCarms2)

NCvoted1 <- lm(voted2020gen ~ as.factor(Treatment), data = main[main$list == "NCmainlist",]); summary(NCvoted1)
NCvoted2 <- lm(voted2020gen ~ as.factor(Treatment) + ageyears + male + wrublack + pastincarc, data = main[main$list == "NCmainlist",]); summary(NCvoted2)


# analysis
NCregany1 <- lm(reg2020 ~ anytreat , data = main[main$list == "NCmainlist",]); summary(NCregany1)
NCregany2 <- lm(reg2020 ~ anytreat + ageyears + male + wrublack + pastincarc, data = main[main$list == "NCmainlist",]); summary(NCregany2)
NCvotedany1 <- lm(voted2020gen ~ anytreat , data = main[main$list == "NCmainlist",]); summary(NCvotedany1)
NCvotedany2 <- lm(voted2020gen ~ anytreat + ageyears + male + wrublack + pastincarc, data = main[main$list == "NCmainlist",]); summary(NCvotedany2)


######################################
# alternate table format: simpler
######################################


# create a table without paneled structure
NCregturnoutmain <- stargazer(NCarms1, NCarms2,NCregany2, NCvoted1, NCvoted2,NCvotedany2,
  omit.stat = c("f", "rsq", "adj.rsq", "ser"),
  omit = c("ageyears", "male", "wrublack", "pastincarc"),
  label = "tab:s4regvote",
  covariate.labels = c("Basic mailer", "No crim. record framing", "No registration form", "Extra civil rights framing", "Any Treatment Mailer"),
  dep.var.labels = c("Voter Registration", "Voted November 2020"),
  font.size = "footnotesize",
  title = "Effects on Voter Registration and Turnout",
  add.lines = list(
    c("Covariates", "", "X", "X", "", "X", "X")
  ),
  notes = "\\parbox[t]{10cm}{This table shows the effect of each treatment (relative to the control), as well as pooled treatment arms relative to control, on voter registration by November 2020 and subsequent turnout.}",
  #notes.append = FALSE, 
  notes.align = "l",digits=1,
 #star.cutoffs = c(0.05), 
 out = "mainstudy_treatmenteffectests_anytreat_NCregturnout.tex"
)



###########################################################################
# Table 2 of main paper
###########################################################################

######################################
# analysis
######################################
NC_blackonly1 <- lm(reg2020 ~ as.factor(Treatment), data = main[main$wrublack == 1 & main$list == "NCmainlist",]); summary(NC_blackonly1) 
NC_whiteonly1 <- lm(reg2020 ~ as.factor(Treatment), data = main[main$wruwhite == 1 & main$list == "NCmainlist",]); summary(NC_whiteonly1) 
NC_racialhet3 <- lm(reg2020 ~ as.factor(Treatment) + wrublack + as.factor(Treatment)*wrublack, 
                   data = main[main$list == "NCmainlist" & main$wrurace %in% c("black", "white"),]); summary(NC_racialhet3) #and do a version with only B/W in last column.

#  do we want to do a pooled ("anytreat" version?)
NC_blackonly1any <- lm(reg2020 ~ anytreat, data = main[main$wrublack == 1 & main$list == "NCmainlist",]); summary(NC_blackonly1any) 
NC_whiteonly1any <- lm(reg2020 ~ anytreat, data = main[main$wruwhite == 1 & main$list == "NCmainlist",]); summary(NC_whiteonly1any) 

NC_racialhet3any <- lm(reg2020 ~ anytreat + wrublack + anytreat*wrublack, 
                       data = main[main$list == "NCmainlist" & main$wrurace %in% c("black", "white"),]); summary(NC_racialhet3any)  #similar

#and also turnout not just reg?
NC_blackonly1anyv <- lm(voted2020gen ~ anytreat, data = main[main$wrublack == 1 & main$list == "NCmainlist",]); summary(NC_blackonly1anyv) 
NC_whiteonly1anyv <- lm(voted2020gen ~ anytreat, data = main[main$wruwhite == 1 & main$list == "NCmainlist",]); summary(NC_whiteonly1anyv) 

NC_racialhet3anyv <- lm(voted2020gen ~ anytreat + wrublack + anytreat*wrublack, 
                       data = main[main$list == "NCmainlist" & main$wrurace %in% c("black", "white"),]); summary(NC_racialhet3anyv)  #similar

#hypothesis test for R1 
library(car)
linearHypothesis(NC_racialhet3anyv, "anytreat + anytreat:wrublack = 0")
linearHypothesis(NC_racialhet3anyv, "anytreat:wrublack = 0")

######################################
#simpler table version
######################################

## May 2025 note:
## Shortening model names to produce this simpler table version
## Since in R version 4.5.0 (2025-04-11) otherwise get an error:
## "Error in if (is.na(s)) { : the condition has length > 1"

b1 <- lm(reg2020 ~ anytreat, data = main[main$wrublack == 1 & main$list == "NCmainlist",])
w1 <- lm(reg2020 ~ anytreat, data = main[main$wruwhite == 1 & main$list == "NCmainlist",])
h3 <- lm(reg2020 ~ anytreat + wrublack + anytreat*wrublack,
         data = main[main$list == "NCmainlist" & main$wrurace %in% c("black", "white"),])

b1v <- lm(voted2020gen ~ anytreat, data = main[main$wrublack == 1 & main$list == "NCmainlist",])
w1v <- lm(voted2020gen ~ anytreat, data = main[main$wruwhite == 1 & main$list == "NCmainlist",])
h3v <- lm(voted2020gen ~ anytreat + wrublack + anytreat*wrublack,
          data = main[main$list == "NCmainlist" & main$wrurace %in% c("black", "white"),])

# create single table
stargazer(
  #NC_blackonly1any, NC_whiteonly1any, NC_racialhet3any, NC_blackonly1anyv, NC_whiteonly1anyv, NC_racialhet3anyv, 
  b1, w1, h3, b1v, w1v, h3v,
          out="mainstudy_treatmenteffectests_raceinteraction_NConly_anytreat.tex",
	covariate.labels=c("Any Treatment Mailer", 
	                   "Black", "Any Treatment Mailer * Black"), 
	title="Racial Heterogeneity", column.labels=c("Black" ,"White","Both","Black" ,"White","Both"),
	dep.var.labels=c("Voter Registration", "Voted November 2020"), font.size = "footnotesize",
	label = "tab:race_interact",digits=1,
	omit.stat=c("f","rsq","adj.rsq","ser"),
notes = "\\parbox[t]{9cm}{This table shows the effect of the treatment (sending a mailer) on voter registration/turnout by race. }",
  notes.align = "l"#,
)

############################################################################################
## Table A1
############################################################################################

# extract north carolina data
main <- main[(main$list == "NCmainlist"), ]; dim(main) 
main$dayssincerelease <- as.Date("2020-11-03")- main$lastincarcend

# run models
age_test <- lm(ageyears ~ as.factor(Treatment) , data=main)
black_test <- lm(wrublack ~ as.factor(Treatment) , data=main); summary(black_test)
male_test <- lm(male ~ as.factor(Treatment) , data=main); summary(male_test)
pastincarc_test <- lm(pastincarc ~ as.factor(Treatment) , data=main); summary(pastincarc_test)
out_test <- lm(as.numeric(dayssincerelease) ~ as.factor(Treatment) , data=main); summary(out_test)

#now build a table 
model.summary <- coef(summary(male_test))[, 1:2]
model.summary[,2] <- paste("(",sprintf("%.2f", round(model.summary[,2],2)), ")", sep="")
model.summary[1,2] <- ""
model.summary[,1] <- sprintf("%.2f",round(as.numeric(model.summary[,1]),2))
fstat <- summary(male_test)$fstatistic #lm summary doesn't output the p-val for joint f-test so will need to calculate it
model.summary <-rbind(model.summary, c(sprintf("%.2f",pf(fstat[1], fstat[2], fstat[3], lower.tail=FALSE)),"")) #add a row with f-test pval
allmodels <- list(model.summary) #okay, so this is one model (which list are you in)

models <- c( "age_test", "black_test", "pastincarc_test", "out_test") #now bring in all the other covariate models
for (i in 1:length(models)) { #loop over them and format in the same way as above
  model.sum <- coef(summary(get(models[i])))[, 1:2]
  model.sum[,2] <- paste("(",sprintf("%.2f", round(model.sum[,2], 2)), ")", sep="")
  model.sum[1,2] <- ""
  model.sum[,1] <- sprintf("%.2f",round(as.numeric(model.sum[,1]),2))
  fstat <- summary(get(models[i]))$fstatistic # get the model output
  model.sum <-rbind(model.sum, c(sprintf("%.2f",pf(fstat[1], fstat[2], fstat[3], lower.tail=FALSE),2),"")) 
	allmodels[[i+1]] <- model.sum 
}


modeltab <- do.call("cbind", allmodels) #now stick all together
modeltab <-cbind(modeltab, 
                c(nrow(main[main$Treatment=="T1",]), 
                  nrow(main[main$Treatment=="T2",]), 
                  nrow(main[main$Treatment=="T3",]), 
                  nrow(main[main$Treatment=="T4",]), 
                  nrow(main[main$Treatment=="T5",]), 
                  "" )
                ) #add row with obs counts

# create table
modeltable <- xtable(t(modeltab), 
                    label = "tab:sumstats", 
                    caption="Main Study: Covariate balance across treatment arms",
                    align = "lcccccc") 

colnames(modeltable) <- c("Control Mean", "Basic Mailer", "No CR Framing", "No Reg. Form", "Civil Rights Framing", "Joint F-test p-val")
rownames(modeltable) <- c("Male", " ", "Age", "  ", "Black", "   ", "Past Incarc.", "    ",  "Days since release", "      ", "Observations")

addtorow <- list(list(-1), c("\\hline \n & & \\multicolumn{4}{c}{Difference from Control} & \n &\\cline{3-6} \n"))

print(modeltable,
      caption.placement = "top",
      add.to.row = addtorow,
      hline.after = c(0, 10), 
      file="mainstudy_balancetestsbyarm_Ftests_NConly_lightversion.tex")



############################################################################################
## Table A10
############################################################################################

main <- read_csv("main_paper_2025.csv") #read in full dataset again

# set up indicator for any treatment (ie not control), rather than separate arms
main$anytreat <- 1; 
main[main$Treatment == "T1", "anytreat"] <- 0; table(main$anytreat)

# analysis
NCarms1 <- lm(reg2020 ~ as.factor(Treatment), data=main[main$list=="NCmainlist",]); summary(NCarms1)
TXarms1 <- lm(reg2020 ~ as.factor(Treatment), data=main[main$list=="TXmainlist",]); summary(TXarms1)
botharms1 <- lm(reg2020 ~ as.factor(Treatment), data=main); summary(botharms1)

NCvoted1 <- lm(voted2020gen ~ as.factor(Treatment), data=main[main$list=="NCmainlist",]); summary(NCvoted1)
TXvoted1 <- lm(voted2020gen ~ as.factor(Treatment), data=main[main$list=="TXmainlist",]); summary(TXvoted1)
allvoted1 <- lm(voted2020gen ~ as.factor(Treatment), data=main); summary(allvoted1)


NCregany1 <- lm(reg2020 ~ anytreat, data=main[main$list=="NCmainlist",]); summary(NCregany1)
TXregany1 <- lm(reg2020 ~ anytreat, data=main[main$list=="TXmainlist",]); summary(TXregany1) #we'll want these 
bothregany1 <- lm(reg2020 ~ anytreat, data=main); summary(bothregany1)

NCvotedany1 <- lm(voted2020gen ~ anytreat, data=main[main$list=="NCmainlist",]); summary(NCvotedany1)
TXvotedany1 <- lm(voted2020gen ~ anytreat, data=main[main$list=="TXmainlist",]); summary(TXvotedany1)
bothvotedany1 <- lm(voted2020gen ~ anytreat, data=main); summary(bothvotedany1)


###  make main table but do for NC and TX sidebyside, plus combined?
panel1NCTXregturnout<-stargazer(NCarms1, TXarms1, botharms1, NCvoted1, TXvoted1, allvoted1,
  omit.stat=c("f","rsq","adj.rsq","ser"),
  omit = c("Constant"),digits=1,
  label = "tab:regvoteNCTX",
  covariate.labels=c("Basic mailer", 
                    "No crim. record framing", 
                    "No registration form", 
                    "Extra civil rights framing"), 
  dep.var.labels=c("Voter Registration", "Voted November 2020"), 
  column.labels = c("NC", "TX","All", "NC", "TX", "All"),
  font.size = "footnotesize",
  title = "Effects on Voter Registration and Turnout in NC and TX",
  add.lines = list(c("Control Mean", 
                    round(coef(NCregany1)[1],2),
                    round(coef(TXregany1)[1],2), 
                    round(coef(bothregany1)[1],2), 
                    round(coef(NCvotedany1)[1],2), 
                    round(coef(TXvotedany1)[1],2), 
                    round(coef(bothvotedany1)[1],2) 
                    )),
  notes = "\\parbox[t]{\\textwidth}{NOTES and stuff}",
  notes.append = FALSE,notes.align = "l")

## May 2025 note:
## Shortening model names to produce panel2NCTXregturnout
## Since in R version 4.5.0 (2025-04-11) otherwise get an error:
## "Error in if (is.na(s)) { : the condition has length > 1"

ncr1 <- lm(reg2020 ~ anytreat, data=main[main$list=="NCmainlist",])
txr1 <- lm(reg2020 ~ anytreat, data=main[main$list=="TXmainlist",])
br1 <- lm(reg2020 ~ anytreat, data=main);

ncv1 <- lm(voted2020gen ~ anytreat, data=main[main$list=="NCmainlist",])
txv1 <- lm(voted2020gen ~ anytreat, data=main[main$list=="TXmainlist",])
bv1 <- lm(voted2020gen ~ anytreat, data=main)

panel2NCTXregturnout<-stargazer(
  #NCregany1, TXregany1, bothregany1, NCvotedany1, TXvotedany1, bothvotedany1, 
  ncr1, txr1, br1, ncv1, txv1, bv1,
  title = "Effects on Voter Registration and Turnout in NC and TX",
	covariate.labels=c("Any Treatment"), 
	label = "tab:regvoteNCTX",digits=1,
	dep.var.labels=c("Voter Registration", "Voted November 2020"), 
  column.labels = c("NC", "TX","All", "NC", "TX", "All"), font.size = "footnotesize", 
	keep.stat="n", 
	notes = "\\parbox[t]{\\textwidth}{NOTES and stuff}",
	notes.append = FALSE,
  notes.align = "l",
  omit = c("Constant", "ageyears", "male", "wrublack", "pastincarc"),
  add.lines = list(c("Control Mean", 
                      round(coef(NCregany1)[1],2),
                      round(coef(TXregany1)[1],2) , 
                      round(coef(bothregany1)[1],2), 
                      round(coef(NCvotedany1)[1],2), 
                      round(coef(TXvotedany1)[1],2), 
                      round(coef(bothvotedany1)[1],2) 
                      ))
) #include control means since we omit intercept

#stick those two panels together:
twopanelNCTXregvoting <- star_panel(panel2NCTXregturnout, panel1NCTXregturnout, 
	panel.names= c("All Arms Combined", "Separate Treatment Arms"), 
	panel.label.fontface = "bold") %>%
	star_notes_tex(note.type="threeparttable", 
                  note="This table shows the effect of each treatment (relative to the control), as well as pooled treatment arms relative to control, on voter registration by November 2020 and subsequent turnout, both in our main NC sample and in a sample in TX where we encountered implementation problems. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01")

star_tex_write(twopanelNCTXregvoting, file ="treatmenteffectests_anytreat_NCTXregturnout_twopanel.tex", headers = FALSE)

